*****************************
*USER TO UPDATE FILE LOCATION
*****************************
local location `file'

**********************
*MAIN DATASET 
**********************
*insheet using "`location'\morningstar.csv",clear name
*save "`location'\morningstar_raw.dta",replace 

use "`location'\morningstar_raw.dta",clear

g annualreport = (observation_type=="Annual")

rename asx_code asxcode
g year = substr(report_date,1,4)
g month = substr(report_date,6,2)

*BALANCE SHEET
rename totalassetspreferred assets
rename totaleq equity
rename totaldebt debt
rename totalcash cash
rename totalcurrentassets5020 curasset
rename totalcurrentliabilities6010 curliab
rename totalliabilitiespreferred liab 
rename totalinventories5000 inventories 
*This is net assets as it subtracts accumulated depreciation 
rename totalfixedassets5030 fixedassets
rename totalfixedassets11080 fixedassets_finance
*Subset of total fixed assets 
rename plantequipmentvehicles552 ppe
rename lessaccumulateddepreciationppe55 accdep

rename leasedassets562 leaseassets 
rename leaseliabilities311 leaseliab 

*P&L  
rename revenuepref revenue
rename ebit8012 ebit 
rename ebitdapreferred ebitda 
rename netinterestexpensespreferred netintexp 
rename interestrevenue8014 intinc 
rename interestexpense8016 intexp 
rename depreciationandamortisation8010 depreciation 


*Cash flow statement
rename capitalexpenditurepurchaseppe915 capex 
*This is a subset of total capex 
rename purchaseoffixedassets850 kpurchase
rename interestexpenseborrowingcosts65 intexp_cf 

rename totalsharesout sharesout 
rename ordinaryshares ordshares 
rename preferenceshares prefshares 

rename staffandemployeeexpenses2200 wagebill 
rename numberofemployees708 employees

*IMPORTANT 
keep asxcode annualreport gics* year month name assets curass curliab liab fixedasset* equity debt cash ebit ebitda revenue capex depreciation intexp* netintexp intinc ppe accdep kpurchase inventories leaseassets leaseliab sharesout ordshares prefshares wagebill employees
destring assets curass curliab liab fixedasset* equity debt cash ebit ebitda revenue capex depreciation intexp* netintexp intinc ppe accdep kpurchase inventories leaseassets leaseliab sharesout ordshares prefshares wagebill employees, replace ignore("NA")

destring year month, replace ignore("NA" "/")

egen id = group(asxcode)

g fyear = year 
replace fyear = year+1 if month > 6

*Most are missing anyway...
bysort id fyear: g count = _N
drop if count > 2 
drop count 

*Semin annual 
g period = 1 if month <=6 
replace period = 2 if month > 6

*Based on calendar year 
gen hyear=yh(year,period)
format %th hyear 

duplicates report asxcode hyear
duplicates drop asxcode hyear , force 

replace capex = capex*-1 
replace kpurchase = kpurchase*-1 
replace intexp=intexp*-1
replace deprec = deprec*-1 
replace wagebill = wagebill*-1 

*Fixed assets for finance companies 
replace fixedassets = fixedassets_finance if fixedassets ==. & fixedassets_finance ~=. 
drop fixedassets_finance

g fyend = month if annualreport ==1

*Merging turnover and share prices 
merge 1:1 asxcode hyear using "`location'\shprice_1997_2020.dta" , nogen keep(match master)

save  "`location'\morningstar_semiannual.dta", replace

*NOTE there is a different fixed asset item for financial companies if choose to go down that path 


**********************
*SENTIMENT INDICATOR 
**********************

foreach i in 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 {
insheet using "`location'\connect4_`i'.csv", clear 
g firm_code = substr(name,1,3)
replace firm_code = upper(firm_code) 
keep firm_code year count* word* 
save  "`location'\connect4_`i'.dta", replace 

}

clear 
foreach i in 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 {
append using   "`location'\connect4_`i'.dta"
}

g sentiment = (countpos-countneg)/wordcount 
replace sentiment = sentiment*1000
bysort year: su sentiment, det 
g uncertainty = countunc/word 
replace uncertainty = uncertainty*1000 
bysort year: su uncertainty, det 
g fyear = year 

bysort firm_code fyear: egen maxcount = max(wordcount) 
keep if wordcount == maxcount 
drop maxcount 

duplicates report firm_code fyear 
duplicates drop  firm_code fyear , force 
rename wordcount countwords 
save    "`location'\sentiment.dta", replace

*Additional MS data 

**********************
*Company listing dates
**********************
*Used for age variable 
import excel using "`location'\D21 244102  Listed Companies - Locations.XLSX", clear first
rename ABN abn 
rename Symbol asxcode 
g length = length(asxcode)
keep if length == 3 
bysort asxcode: g count = _n 
keep if count == 1 
g listyear = substr(ListDate,7,4)
g listmonth = substr(ListDate,1,2)
destring listmonth listyear, replace force 
keep asxcode abn listmonth listyear 
replace listyear = . if listyear == 1900
save "`location'\ms_listing.dta", replace

******************************************
*EIKON SHARE PRICES AND OUTSTANDING SHARES
******************************************

import excel using "`location'\Eikon.xlsx", clear sheet(STATA) first 
g firm_code = substr(code,3,3)
keep firm_code name shprice* outstanding*
destring shprice* outstanding*, replace force 
drop if name == ""
drop if name == "#ERROR"
reshape long shprice outstanding, i(firm_code name) j(year)
replace shprice = . if shprice == 0 
replace outstanding = . if outstanding == 0 
drop if year < 2003
rename shprice shprice_eikon 
rename outstanding outstanding_eikon 
save "`location'\shprice_eikon.dta", replace 

*****************************
*EIKON EPS FORECASTS 
*****************************
import excel using "`location'\Eikon.xlsx", clear sheet(MAR2021) first 
drop if name == "#ERROR"
replace name = subinstr(name,"LTD","",.)
replace name=strtrim(name)
keep code name 
g length = length(name)
g shortname = substr(name,1,15)
replace shortname = subinstr(shortname,"  ","",.)
keep code name shortname 
duplicates drop shortname , force 
save "`location'\epsnametemp.dta", replace 

import excel using "`location'\Eikon.xlsx", clear sheet(EPS1)
rename A name 
rename B eps11981
rename C eps11982
rename D eps11983
rename E eps11984
rename F eps11985
rename G eps11986
rename H eps11987
rename I eps11988
rename J eps11989
rename K eps11990
rename L eps11991
rename M eps11992
rename N eps11993
rename O eps11994
rename P eps11995
rename Q eps11996
rename R eps11997
rename S eps11998
rename T eps11999
rename U eps12000
rename V eps12001
rename W eps12002
rename X eps12003
rename Y eps12004
rename Z eps12005
rename AA eps12006
rename AB eps12007
rename AC eps12008
rename AD eps12009
rename AE eps12010
rename AF eps12011
rename AG eps12012
rename AH eps12013
rename AI eps12014
rename AJ eps12015
rename AK eps12016
rename AL eps12017
rename AM eps12018
rename AN eps12019
rename AO eps12020
keep name eps1*
destring eps1*, replace force 
drop if name == ""
drop if name == "#ERROR"
drop in 1 

gen indicator = strpos(name, "FY1 EPS MEAN EST")
g company_name = substr(name, 1, indicator-4)
drop if company_name == ""
keep company_name eps1* 
bysort company_name: g count = _n
drop if count == 2
drop count 
reshape long eps1, i(company_name) j(year)
save "`location'\eps1temp.dta", replace 

use "`location'\eps1temp.dta", clear 
replace company_name = subinstr(company_name," LTD","",.)
replace company_name=strtrim(company_name)
g length = length(company_name)
g shortname = substr(company_name,1,15)
rename year fyear 
drop if fyear < 2003 
merge m:1 shortname using "`location'\epsnametemp.dta", nogen keep(match master)
g asxcode = substr(code,3,3)
keep fyear eps1 asxcode  name 
drop if asxcode == ""
duplicates drop asxcode fyear, force 
save "`location'\eps1.dta", replace 



*****************
*Turnover measure 
*****************
*THIS is a semi annual measure 
use "`location'\shprice_1997_2020.dta" , clear 
merge 1:1 asxcode year month using  "`location'\sharesoutstanding.dta" ,nogen keep(match master)

preserve 
collapse(mean) shprice (sum)volume, by(asxcode year month)
rename volume volume_all
replace volume_all = volume_all/1000
merge 1:1 asxcode year month using "`location'\endmonth_share_prices.dta", nogen keep(match master)
restore 


egen firmid = group(asxcode)
tsset firmid hyear 
replace volume = volume/1000 
g turnover = volume/l1.outstanding 
g lshprice = log(shprice)
g dlshprice = d.lshprice
*Ideally this should be a time-varying measure so want to look at it at a daily frequency first!!!
bysort asxcode: egen volatility = sd(dlshprice)
keep if month == 6
collapse(mean) volatility turnover, by(asxcode fyear)
save "`location'\turnover.dta" , replace



*****************
*COMBINING DATA
*Adjustments for FY end 
*****************
use "`location'\morningstar_semiannual.dta" , clear 
g firm_code = asxcode 
merge m:1 firm_code fyear using "`location'\sentiment.dta", nogen keep(match  master)
merge m:1 firm_code year using "`location'\shprice_eikon.dta", nogen keep(match master)
merge m:1 asxcode fyear using "`location'\eps1.dta", nogen keep(match master)
merge m:1 asxcode using "`location'\ms_listing.dta", nogen keep(match master)

merge m:1 asxcode fyear using  "`location'\turnover.dta" , nogen keep(match master)


rename countwords total 
rename countneg negative
rename countpos positive 
rename countunc_lite uncertain

rename revenue sales

egen firmid = group(asxcode)
tsset firmid hyear 

*This is all done on a semin annual basis now 
*ADJUSTMENT HERE BECAUSE DONT HAVE LEASING ASSETS IN 2021 YET 
g fixedassets_adj = fixedasset 
tsset firmid hyear 
replace fixedassets_adj = fixedassets-leaseasset if year == 2020 & leaseasset ~=. & l2.leaseasset == . 
replace fixedassets_adj = . if year == 2021 

g liabilities_adj = liab
tsset firmid hyear 
replace liabilities_adj = liab-leaseliab if year == 2020 & leaseliab ~=. & l2.leaseliab == . 
replace liabilities_adj = . if year == 2021 

g assets_adj = assets
tsset firmid hyear 
replace assets_adj = assets-leaseass if year == 2020 & leaseass ~=. & l2.leaseass == . 
replace assets_adj = . if year == 2021 

g kstock = fixedassets_adj 

replace sharesout = sharesout/1000000
replace outstanding_eikon = outstanding_eikon/1000 
*Definition based on Gutiérrez and Philippon
g mv_ms = shprice*sharesout
g mv = shprice_eikon*outstanding_eikon


*Adjusted for leasing 
g qratio = (mv+liabilities_adj-inventories)/assets_adj
*Should be excluding other assets too 


collapse(mean) total negative positive uncertain assets assets_adj qratio* liab kstock inventories turnover  employee (sum) capex kpurchase ebit ebitda sales eps1 wagebill (max) listyear listmonth , by(asxcode gics_sector gics_group fyear)
 
*Needs to be based on fiscal year for sentiment measure 
save  "`location'\ms_sentiment_analysis.dta", replace 

